# === load relevant libraries
library(data.table)
library(ggplot2)
library(DescTools)
library(plm)
library(lfe)
library(dplyr)
library(sandwich)

# === Panel A
rm(list = ls())

# regression table
data_bk = readRDS('input_data/stock_level_regression_table1.RDS')

# function to estimate Fama-Macbeth regressions and get Newey-West standard errors with 12 lags
p.fm_nw = function(ff){
  kLags = 12  
  
  # estimate by month
  tt = unique(data[, yyyymm])
  out = data.table()
  out_r2 = data.table()
  for (thisT in tt){
    ols = lm(ff, data[yyyymm == thisT])
    subout = data.table(yyyymm = thisT, var = names(ols$coef), est = ols$coef)
    subout = subout[!grepl('netret', var) & !grepl('catadjret', var) & !grepl('held_', var)]
    out = rbind(out, subout)
    out_r2 = rbind(out_r2, data.table(yyyymm = thisT, r2 = summary(ols)$adj))
  }
  
  # average estimate over each cross-section
  tt = out[, list(coef = mean(est)), var]
  
  # get newey-west standard errors
  for (i in 1:nrow(tt)){
    tt[i, se_neweyWest := sqrt(NeweyWest(lm(est ~ 1, out[var == tt[i, var]]), kLags))]
  }
  
  # format
  tt[, tstat := round(coef/se_neweyWest,2)]
  tt[, se_neweyWest := NULL]
  tt[, coef := round(coef, 2)]
  tt = tt[var != '(Intercept)'] # omit intercept in output
  
  # get num of obs and r2
  tt[, numObs := nrow(data)]
  tt[, r2 := round(out_r2[, mean(r2)],3)]
  return(tt)
}

# columns 1 to 3, full sample
data = copy(data_bk)
print(p.fm_nw('ret ~ mve + bm + operprof + agr + mom + reversal'))
print(p.fm_nw('ret ~ expSum_1 + mve + bm + operprof + agr + mom + reversal + netret_past + catadjret_past + held_1'))
print(p.fm_nw('ret ~ expSum_1_held + mve + bm + operprof + agr + mom + reversal + netret_past + catadjret_past + held_1'))

# columns 4 to 5, require at least 3 funds
data = copy(data_bk[num_funds_1 >= 3])
print(p.fm_nw('ret ~ expSum_1 + mve + bm + operprof + agr + mom + reversal + netret_past + catadjret_past + held_1'))
print(p.fm_nw('ret ~ expSum_1_held + mve + bm + operprof + agr + mom + reversal + netret_past + catadjret_past + held_1'))

# columns 6 to 7, Ex. microcaps
data = copy(data_bk[isMicrocap == 0])
print(p.fm_nw('ret ~ expSum_1 + mve + bm + operprof + agr + mom + reversal + netret_past + catadjret_past + held_1'))
print(p.fm_nw('ret ~ expSum_1_held + mve + bm + operprof + agr + mom + reversal + netret_past + catadjret_past + held_1'))


# === Panel B

# wrapper function to extract the main coefficient of interest
p.wrapper = function(thisV){
  ff = paste0('ret ~ ', thisV, ff0)
  out = p.fm_nw(ff)
  return(out[var == thisV, list(var = thisV, coef, tstat)])
}

# alternative definitions of the rating independent variable
vars0 = c('expSum_1','drating_3m_1','drating_6m_1','drating_9m_1','drating_12m_1')
vars = copy(vars0)
vars_held = paste0(vars0, '_held')
vars_hetero = paste0(vars0, '_hetero')
vars_hetero_held = paste0(vars0, '_hetero_held')
rm(vars0)

# == linear controls (first row in Panel B)
ff0 = ' + held_1 + mve + bm + operprof + agr + mom + reversal + netret_past + catadjret_past' # controls

# columns 1 and 4, full sample
data = copy(data_bk)
Reduce(rbind, lapply(vars, p.wrapper))
Reduce(rbind, lapply(vars_held, p.wrapper))

# columns 2 and 5, min 3 funds
data = copy(data_bk[num_funds_1 >= 3])
Reduce(rbind, lapply(vars, p.wrapper))
Reduce(rbind, lapply(vars_held, p.wrapper))

# columns 3 and 6, Ex. microcaps
data = copy(data_bk[isMicrocap == 0])
Reduce(rbind, lapply(vars, p.wrapper))
Reduce(rbind, lapply(vars_held, p.wrapper))

# == nonlinear controls (second row in Panel B)

ff0 = ' + held_1 + mve + bm + operprof + agr + mom + reversal + bin_netret_past + bin_catadjret_past' # controls

# columns 1 and 4, full sample
data = copy(data_bk)
Reduce(rbind, lapply(vars, p.wrapper))
Reduce(rbind, lapply(vars_held, p.wrapper))

# columns 2 and 5, min 3 funds
data = copy(data_bk[num_funds_1 >= 3])
Reduce(rbind, lapply(vars, p.wrapper))
Reduce(rbind, lapply(vars_held, p.wrapper))

# columns 3 and 6, Ex. microcaps
data = copy(data_bk[isMicrocap == 0])
Reduce(rbind, lapply(vars, p.wrapper))
Reduce(rbind, lapply(vars_held, p.wrapper))


# == heterogeneous response (third row in Panel B)
ff0 = ' + held_1 + mve + bm + operprof + agr + mom + reversal + netret_past + catadjret_past' # controls

# columns 1 and 4, full sample
data = copy(data_bk)
Reduce(rbind, lapply(vars_hetero, p.wrapper))
Reduce(rbind, lapply(vars_hetero_held, p.wrapper))

# columns 2 and 5, min 3 funds
data = copy(data_bk[num_funds_1 >= 3])
Reduce(rbind, lapply(vars_hetero, p.wrapper))
Reduce(rbind, lapply(vars_hetero_held, p.wrapper))

# columns 3 and 6, Ex. microcaps
data = copy(data_bk[isMicrocap == 0])
Reduce(rbind, lapply(vars_hetero, p.wrapper))
Reduce(rbind, lapply(vars_hetero_held, p.wrapper))
